
**********
* Readme *
**********

* This script generates Table 2: Descriptive statistics for own-account workers and employees

* Notes: [1] These summary statistics were calculated using the National Household Survey (PNAD) and refer to all working-age individuals (14-64 years old), living in Brazil's urban areas, who reported being occupied as either own-account workers or wage employees. [2] The results represent the average over the 8 quarters of 2017-18, with the exception of the workplace information, which is only available for the 4 quarters of 2018. [3] Employment status, formality status, workplace, and occupation all refer to an individual's main employment. [4] A worker is assigned a formal work status by having a register either as a worker (``carteira assinada'') or as a small business (``CNPJ''). [5] The group of own-account workers includes domestic workers, who are by default defined as employees in the official figures from the Brazilian statistical office. This methodological decision is adopted throughout this paper. An overview of the results under the standard classification is available in the appendix.

* And Table 3: Most frequent occupations by labor market status

* Notes: The reported occupations are the most granular category (level 4) in the International Standard Classification of Occupations (ISCO). See also the notes for table 2.}


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/3_3_descriptive_labor_status_${today}.smcl", replace


*******************************
* Table 2: Who are the OAWs ? *
*******************************

* PNAD data
use "${analysis}/data/2_4_pnad_clean.dta", clear

keep strata_id psu_id pweight /// 
	work_state work_state_name occupation ///
	rg_* educ_* age_* formal wp_*
	
svyset psu_id [pweight = pweight], strata(strata_id) singleunit(centered)

* Variables in the table
global variables rg_1_nonwhite_female rg_2_white_female rg_3_nonwhite_male rg_4_white_male educ_1 educ_2 educ_3 educ_4 age_1 age_2 age_3 age_4 age_5 formal wp_1 wp_2 wp_3 wp_4 wp_5 wp_6 wp_7 wp_8

tempfile full_data
save `full_data', replace


* Attributes
************

use `full_data', clear

* Population
gen pop = 1/(10^6) 	// population in millions
replace pop = pop/8	// average over 8 quarters

* Percentages
replace formal = formal * 100

replace rg_1_nonwhite_female = rg_1_nonwhite_female * 100
replace rg_2_white_female    = rg_2_white_female * 100
replace rg_3_nonwhite_male   = rg_3_nonwhite_male * 100
replace rg_4_white_male      = rg_4_white_male * 100

replace educ_1 = educ_1 * 100
replace educ_2 = educ_2 * 100
replace educ_3 = educ_3 * 100
replace educ_4 = educ_4 * 100

replace age_1 = age_1 * 100
replace age_2 = age_2 * 100
replace age_3 = age_3 * 100
replace age_4 = age_4 * 100
replace age_5 = age_5 * 100

replace wp_1 = wp_1 * 100
replace wp_2 = wp_2 * 100
replace wp_3 = wp_3 * 100
replace wp_4 = wp_4 * 100
replace wp_5 = wp_5 * 100
replace wp_6 = wp_6 * 100
replace wp_7 = wp_7 * 100
replace wp_8 = wp_8 * 100


* OAW
*****

svy, subpop(if work_state_name == "Own-account worker"): total pop
matrix oaw_n = r(table)

svy, subpop(if work_state_name == "Own-account worker"): mean $variables
matrix oaw_stats = r(table)

matrix oaw = oaw_n["b",1...], oaw_stats["b",1...]
matrix rownames oaw = "OAW"


* Employee
**********

svy, subpop(if work_state_name == "Employee"): total pop
matrix ee_n = r(table)

svy, subpop(if work_state_name == "Employee"): mean $variables
matrix ee_stats = r(table)

matrix ee = ee_n["b",1...], ee_stats["b",1...]
matrix rownames ee = "Employee"


* Combined results
******************

matrix oaw_ee = oaw\ee
matrix list oaw_ee


* Dataset with results
**********************

clear
svmat oaw_ee, names(col)
tostring *, replace format(%9.1f) force
sxpose2, clear force varname

rename _varname attribute
rename _var1 oaw
rename _var2 ee


* Cleaning output
*****************

replace attribute = "\textit{Subpopulation size (in millions)}" if attribute == "pop"

replace attribute = "\tabularnewline \textit{Ethnicity and gender (in \%)} \\ \hspace{2ex} Nonwhite female" if attribute == "rg_1_nonwhite_female"
replace attribute = "\hspace{2ex} White female" if attribute == "rg_2_white_female"
replace attribute = "\hspace{2ex} Nonwhite male" if attribute == "rg_3_nonwhite_male"
replace attribute = "\hspace{2ex} White male" if attribute == "rg_4_white_male"

replace attribute = "\tabularnewline \textit{Education level (in \%)} \\ \hspace{2ex} Less than prim. school" if attribute == "educ_1"
replace attribute = "\hspace{2ex} Primary school" if attribute == "educ_2"
replace attribute = "\hspace{2ex} High school" if attribute == "educ_3"
replace attribute = "\hspace{2ex} College or above" if attribute == "educ_4"

replace attribute = "\tabularnewline \textit{Age group (in \%)} \\ \hspace{2ex} Age 14-24" if attribute == "age_1"
replace attribute = "\hspace{2ex} Age 25-34" if attribute == "age_2"
replace attribute = "\hspace{2ex} Age 35-44" if attribute == "age_3"
replace attribute = "\hspace{2ex} Age 45-54" if attribute == "age_4"
replace attribute = "\hspace{2ex} Age 55-64" if attribute == "age_5"

replace attribute = "\tabularnewline \textit{Formal work status (in \%)}" if attribute == "formal"

replace attribute = "\tabularnewline \textit{Usual workplace (in \%)} \\ \hspace{2ex} Dedicated store, office" if attribute == "wp_1"
replace attribute = "\hspace{2ex} Place chosen by client, employer" if attribute == "wp_2"
replace attribute = "\hspace{2ex} Client's, employer's home" if attribute == "wp_3"
replace attribute = "\hspace{2ex} Worker's home (dedicated area)" if attribute == "wp_4"
replace attribute = "\hspace{2ex} Worker's home (shared area)" if attribute == "wp_5"
replace attribute = "\hspace{2ex} Worker's vehicle" if attribute == "wp_6"
replace attribute = "\hspace{2ex} Public space" if attribute == "wp_7"
replace attribute = "\hspace{2ex} Other places" if attribute == "wp_8"

list, separator(50) 


* TABLE 2
*********

texsave * using "${analysis}/results/tables/table2.tex", dataonly replace nofix noendash


*************************************
* Table 3: What are the OAWs doing? *
*************************************

use `full_data', clear


* Occupation
************

* Following ISCO-08 https://en.wikipedia.org/wiki/International_Standard_Classification_of_Occupations

* Add prefix "10" to military occupations to avoid unintended merge with other groups
replace occupation = 10110 if occupation == 110
replace occupation = 10210 if occupation == 210
replace occupation = 10411 if occupation == 411
replace occupation = 10412 if occupation == 412
replace occupation = 10511 if occupation == 511
replace occupation = 10512 if occupation == 512

* ISCO level 4
gen occupation_4 = floor(occupation/1)*1

* Define the standard ISCO labels
do "${analysis}/code/isco_labels.do" 
 
* Label the values
label values occupation_4 isco08_lbl

* Summary by work state
keep if inlist(work_state_name, "Own-account worker", "Employee")
gen freq = pweight/8 // average over 8 quarters, each representing the whole population
gcollapse (sum) freq, by(work_state_name occupation_4)
replace freq = freq/1000
by work_state_name: egen tot = total(freq)
gen prop = 100*freq/tot
gsort work_state_name -prop 
drop  freq tot
decode occupation_*, generate(occupation_str)
tostring prop, generate(prop_str) format(%7.1f) force
generate prop_str_par = "(" + prop_str +")"
generate oaw = occupation_str + ": " + prop_str_par if work_state_name == "Own-account worker"
generate ee = occupation_str + ": " + prop_str_par if work_state_name == "Employee"
drop work_state_name prop occupation_* prop_str prop_str_par

tempfile all_occupations
save `all_occupations', replace


* Most common occupations for employee
**************************************

use `all_occupations', clear
keep if ee != ""
drop oaw
drop if _n > 10
gen rank = _n
order rank

tempfile ee_occupations
save `ee_occupations', replace


* Most common occupations for OAW
*********************************

use `all_occupations', clear
keep if oaw != ""
drop ee
drop if _n > 10
gen rank = _n
order rank

merge 1:1 rank using `ee_occupations', nogenerate force
tostring rank, replace

replace rank = "1st"  if rank == "1"
replace rank = "2nd"  if rank == "2"
replace rank = "3rd"  if rank == "3"
replace rank = "4th"  if rank == "4"
replace rank = "5th"  if rank == "5"
replace rank = "6th"  if rank == "6"
replace rank = "7th"  if rank == "7"
replace rank = "8th"  if rank == "8"
replace rank = "9th"  if rank == "9"
replace rank = "10th" if rank == "10"

list, separator(50) 


* TABLE 3
*********

texsave * using "${analysis}/results/tables/table3.tex", dataonly replace nofix noendash


* End of script
***************
cap log close